## Realistic data simulations
## 28 October 2009

library(moments)
library(gregmisc)
library(RItools)

## Create many data sets, one SB and one CR experiment from each:
real.sims.many.data.sets <- function(dir.names, cont, exact, howmanysets = 100, units = 40, stat = "mean"){
	for(i in 1:length(dir.names)){
		outfileDir <- dir.names[i]

		if(i %in% grep("k", nm.grid[, 1])){
			methd <- "ktimes"
			kf <- as.integer(substr(nm.grid[i, 1], 2,2))
		}
		if(nm.grid[i, 1] == "prop"){
			methd <- "prop"
		}
		if(nm.grid[i, 1] == "wprop"){
			methd <- "wprop"
		}
		whenoutlier <- nm.grid[i, 2]
	
		for(idx.sets in 1:howmanysets){
		
			gender <- sample (1:2, units, rep=T)
			MDDdx <- sample (0:1, units, rep=T)
			pssi <- sample (10:51, units, rep=T)
			age <- sample (18:65, units, rep=T)
			phq9sev <- sample (0:27, units, rep=T)
			FAS <- sample (10:80, units, rep=T)
			StroopInt <- sample (21:80, units, rep=T)
			LM1 <- sample (1:19, units, rep=T)
			LM2 <- sample (1:19, units, rep=T)
			curr.set <- data.frame(id = 901:(900+units), gender=gender, MDDdx=MDDdx, pssi=pssi, age=age, phq9sev=phq9sev, FAS=FAS, StroopInt=StroopInt, LM1=LM1, LM2=LM2)

			seqblock1(id.vars = "id", id.vals = curr.set[1, "id"], exact.vars = exact, exact.vals = curr.set[1, exact], covar.vars = cont, covar.vals=curr.set[1, cont], n.tr = 2, assg.prob.stat = stat, assg.prob.method = methd, assg.prob.kfac = kf, file.name = paste(outfileDir, "/sim", idx.sets, ".RData", sep=""), verbose = F)				
			for(idx.units in 2:nrow(curr.set)){
				if(whenoutlier == idx.units){
					seqblock2k(object= paste(outfileDir, "/sim", idx.sets, ".RData", sep=""), id.vals = curr.set[idx.units, "id"], exact.vals = curr.set[idx.units, exact], covar.vals = curr.set[idx.units, cont]*5, file.name = paste(outfileDir, "/sim", idx.sets, ".RData", sep=""), verbose=F)				}else{
					seqblock2k(object= paste(outfileDir, "/sim", idx.sets, ".RData", sep=""), id.vals = curr.set[idx.units, "id"], exact.vals = curr.set[idx.units, exact], covar.vals = curr.set[idx.units, cont], file.name = paste(outfileDir, "/sim", idx.sets, ".RData", sep=""), verbose=F)
				}
			}
		}
	}	
}

meth.names <- c("k2", "k3", "k4", "k5", "k6", "prop", "wprop")
out.names <- c(0, 2, 20, 35)
cont <- c("pssi", "age", "phq9sev", "FAS", "StroopInt", "LM1", "LM2")
exact <- c("gender", "MDDdx")

## Create directory structure for storing output:
nm.grid <- expand.grid(meth.names, out.names)
dir.names <- array(NA, nrow(nm.grid))
for(i in 1:length(dir.names)){
	dir.names[i] <- paste("simsRealHanBow", nm.grid[i, 1], "out", nm.grid[i, 2], sep = "")
	mkdir.system.arg <- paste("mkdir ", dir.names[i], sep = "")
	system(mkdir.system.arg)
}

## Populate directories:
real.sims.many.data.sets(dir.names, cont, exact, howmanysets = 100, units = 40, stat = "mean")

wkspc.names <- paste("out", nm.grid[, 2], nm.grid[, 1], sep = "")

wkspc.list <- list()
for(i in 1:length(dir.names)){
		assign(wkspc.names[i], hanbowTCrealLoop(dir.names[i]))
		wkspc.list[[i]] <- get(wkspc.names[i])
}

names(wkspc.list) <- wkspc.names

## average outlier==TRUE sims:
wkspc.outliers.only <- wkspc.list[-grep("out0", wkspc.names)]
k2 <- unname(unlist(wkspc.outliers.only[grep("k2", names(wkspc.outliers.only))]))
k3 <- unname(unlist(wkspc.outliers.only[grep("k3", names(wkspc.outliers.only))]))
k4 <- unname(unlist(wkspc.outliers.only[grep("k4", names(wkspc.outliers.only))]))
k5 <- unname(unlist(wkspc.outliers.only[grep("k5", names(wkspc.outliers.only))]))
k6 <- unname(unlist(wkspc.outliers.only[grep("k6", names(wkspc.outliers.only))]))
wprop <- unname(unlist(wkspc.outliers.only[grep("wprop", names(wkspc.outliers.only))]))
prop <- unname(unlist(wkspc.outliers.only[names(wkspc.outliers.only)[substr(names(wkspc.outliers.only), 6, 9) == "prop"]]))

## Figure 4 (left):
boxplot(wkspc.list[grep("out0", wkspc.names)], notch=T, ylab="d^2 p-values", main="No outliers", names=meth.names)

## Figure 4 (right):
boxplot(list(k2, k3, k4, k5, k6, prop, wprop), notch=T, ylab="d^2 p-values", main="Outliers only", names= meth.names)
dev.off()

